#!/user/bin/python
# -*- coding: utf-8 -*-

import urllib
import urllib2
import re

url = 'http://jinnianshilongnian.iteye.com/blog/2018398';
#加入header验证
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }

request = urllib2.Request(url, headers = headers)
try:
    response = urllib2.urlopen(request)
    content = response.read().decode('utf-8')
    #正则校验
    pattern = re.compile('<p.?style="font-size: 14px;">\s*?<a\s?href="(/blog/.*?)"\starget="_.{0,6}">.*?</a></p>', re.S)
    items = re.findall(pattern,content)
    for x in items:
        print x
        
except urllib2.URLError, e:
    if hasattr(e, 'code'):
        print e.code
    if hasattr(e, 'reason'):
        print e.reason
